library(dplyr)
pops<-c("ALL","GWD","MSL","YRI","ESN","LWK")
genes = c("IFNG", "IL10", "IL13", "IL4", "IL5", "STAT6", "CTLA4", "FCN2", "COLEC11", "ABO", "IL17A",   "IL17B",   "CRP", "IL6R", "IL17F",   "IL9", "CD14", "CXCL14",  "IL3", "IL12B",   "VEGFA",  "CTGF", "IL22RA2", "NOS3", "SHH")

popdf<-list()
ranges<-list()
minX<-list()
#intialise ranges (the length of each gene regions)
for (gene in genes){
  ranges[[gene]] = 0
  minX[[gene]] = 500000000
}
#open all data files and get maximum lenght of each gene region from start of first haplotype block to end of last
for (pop in pops){
  popdf[[pop]] <- data.frame(read.table(paste("Summary.HapStats.1kg.h3a",pop,"txt",sep="."),header = TRUE))
  for (gene in genes){
    starts<-popdf[[pop]]  %>% filter(popdf[[pop]]$Gene == gene)
    rng <- max(starts$BlockEnd)  - min(starts$BlockStart)
    if(rng > ranges[[gene]]){
      ranges[[gene]] = rng
    }
    if(minX[[gene]] > min(starts$BlockStart)){
      minX[[gene]] = min(starts$BlockStart)
    }
  }
}
#read in exon co-ordinates
exons<-data.frame(read.table("exons.ensGRCh37.txt",header=FALSE))
colnames(exons) <- c("EnsGeneID", "EnsGeneVer", "ExonStart", "ExonEnd","ExonId","Gene")

#set output file and set layout and margins of individidual plots
pdf(file="GeneHapBlockPlots.pdf",paper = "a4",width = 7, height = 10.5)
par(mfrow=c(7,2),mar = c(3, 3, 1, 3))
cols = rainbow(9) #9 is the max number of blocks in a gene

#make plot for each gene
for (gene in genes){
  maxX = 100;
  
  yOffset = 5
  yPos = yOffset
  rng = ranges[[gene]]
  #initialise plot
  plot(c(0, 1.05 * maxX), c(0, 58), type= "n",  xlab = "", ylab = "", yaxt = "n", xaxt = "n")
  
  #set up y axis
  #yAxTicks = c(1.5 * yOffset, length(pops) * 1.5 * yOffset, length(pops) - 1)
  yAxTicks = seq(from = 1.5 * yOffset, to = (length(pops) + 1) * 1.5 * yOffset, by = 1.5 * yOffset)
  axis(side=2, at=yAxTicks, labels = FALSE)
  text(par("usr")[1], yAxTicks, labels = c(gene,pops),  pos = 2, xpd = TRUE)
  
  #set x axis
  xAxTicks = seq (from = 0, to = 1.05 * maxX , by = 20)
  xAxLabs = round(seq(from = 0, to = (1.05 * rng/1000), by = ((1.05 * rng/1000)/5)),digits =1)
  axis(side=1, at=xAxTicks, labels = FALSE)
  text( xAxTicks, par("usr")[3], labels = xAxLabs,  pos = 1, xpd = TRUE)
  title(xlab=paste("Position (kb) in ", gene, sep = ""), line=1.5, cex.lab=1.2)
  
  #add exons
  geneEx <- exons  %>% filter(exons$Gene == gene)
  #Add line between exons
  lineEnd = maxX * ((max(geneEx$ExonStart) - minX[[gene]])/rng)
  lineStart = maxX * ((min(geneEx$ExonEnd) - minX[[gene]])/rng)
  segments( lineStart, yPos + yOffset/2,lineEnd, yPos + yOffset/2, col="darkgrey", lwd=2)
  for (row in 1:nrow(geneEx)) {
    #draw exons
    start = maxX * ((geneEx$ExonStart[row] - minX[[gene]])/rng)
    end = maxX * ((geneEx$ExonEnd[row] - minX[[gene]])/rng)
    rect(start, yPos,  end, yPos + yOffset, col = "black",border = "black")
  }
  yPos = yPos + 1.5 * yOffset
  #draw haplotype blocks
  for (pop in pops){
    stars<-popdf[[pop]]  %>% filter(popdf[[pop]]$Gene == gene)
    starts = stars[order(stars$BlockStart),]
    for (row in 1:nrow(starts)) {
      start = maxX * ((starts$BlockStart[row] - minX[[gene]])/rng)
      end = maxX * ((starts$BlockEnd[row] - minX[[gene]])/rng)
      rect(start, yPos,  end, yPos + yOffset, col = cols[row])
    }
    yPos = yPos + 1.5 * yOffset
  }
}
dev.off()